# ============================================================
# Function to automatically install and load required packages
# ============================================================
load_or_install <- function(package_names) {
  for (pkg in package_names) {
    if (!require(pkg, character.only = TRUE)) {
      install.packages(pkg, dependencies = TRUE)
      library(pkg, character.only = TRUE)
    }
  }
}

# ============================================================
# Define and load required R packages
# ============================================================
needed_pkgs <- c(
  "rcompanion", "effsize", "pwr", "Hmisc",
  "ggplot2", "ggpubr", "dplyr", "xtable",
  "PMCMRplus", "car", "ggbeeswarm",
  "readxl", "writexl"
)
load_or_install(needed_pkgs)

# Explicit library calls for clarity and reproducibility
library(ggplot2)
library(ggpubr)
library(dplyr)
library(xtable)
library(PMCMRplus)
library(car)
library(ggbeeswarm)
library(readxl)
library(effsize)
library(pwr)
library(rcompanion)
library(Hmisc)
library(writexl)

# ============================================================
# Load data from Excel file
# ============================================================
data_full <- read_excel(
  "C:\\Users\\CardiovascularDiabetology.xlsx",
  sheet = "GroupComparison"
)

# ============================================================
# Assign group labels based on row indices
# ============================================================
data_full <- data_full %>%
  mutate(Group = case_when(
    row_number() >= 1  & row_number() <= 37 ~ "CWO",
    row_number() >= 39 & row_number() <= 63 ~ "T2D",
    row_number() >= 65 & row_number() <= 76 ~ "LI",
    TRUE ~ NA_character_
  ))

# ============================================================
# Remove rows without valid group assignment
# ============================================================
analysis_sheet <- data_full %>% filter(!is.na(Group))

# ============================================================
# Convert Group to a factor and set desired order
# ============================================================
analysis_sheet$Group <- factor(
  analysis_sheet$Group,
  levels = c("LI", "CWO", "T2D")
)

# ============================================================
# Helper function to convert p-values to significance markers
# ============================================================
get_sig_marker <- function(p) {
  if (is.na(p)) {
    return("NA")
  } else if (p < 0.001) {
    return("***")
  } else if (p < 0.01) {
    return("**")
  } else if (p < 0.05) {
    return("*")
  } else {
    return("ns")
  }
}

# ============================================================
# Initialize list to store results for all variables
# ============================================================
output_list <- list()

# ============================================================
# Function to analyze a single variable
# ============================================================
analyze_variable <- function(variable_name) {
  
  # Extract Group and target variable
  variable_data <- analysis_sheet[, c("Group", variable_name)]
  names(variable_data)[2] <- "Value"
  
  # Convert values to numeric and remove missing data
  variable_data$Value <- as.numeric(as.character(variable_data$Value))
  variable_data <- variable_data %>% filter(!is.na(Value))
  
  # ----------------------------------------------------------
  # Compute summary statistics by group
  # ----------------------------------------------------------
  summary_stats <- variable_data %>%
    group_by(Group) %>%
    summarise(
      Count = sum(!is.na(Value)),
      MeanSD = paste0(
        round(mean(Value, na.rm = TRUE), 2),
        " ± ",
        round(sd(Value, na.rm = TRUE), 2)
      ),
      MedianIQR = paste0(
        round(median(Value, na.rm = TRUE), 2),
        " (",
        round(quantile(Value, 0.25, na.rm = TRUE), 2),
        "–",
        round(quantile(Value, 0.75, na.rm = TRUE), 2),
        ")"
      ),
      .groups = "drop"
    )
  
  # ----------------------------------------------------------
  # Initialize p-values
  # ----------------------------------------------------------
  p_LI_CWO <- NA
  p_LI_T2D <- NA
  p_CWO_T2D <- NA
  overall_p <- NA
  
  # ----------------------------------------------------------
  # Normality testing using Shapiro-Wilk test
  # ----------------------------------------------------------
  normal_data <- all(
    sapply(levels(variable_data$Group), function(g) {
      vals <- variable_data$Value[variable_data$Group == g]
      if (length(vals) >= 3 && length(vals) <= 5000) {
        shapiro.test(vals)$p.value > 0.05
      } else {
        FALSE
      }
    })
  )
  
  # ----------------------------------------------------------
  # Helper function to extract group statistics
  # ----------------------------------------------------------
  get_group_stat <- function(group_name, type = "mean") {
    stat <- summary_stats %>% filter(Group == group_name)
    if (nrow(stat) == 0) return(NA)
    if (type == "mean") {
      return(stat$MeanSD)
    } else {
      return(stat$MedianIQR)
    }
  }
  
  # ----------------------------------------------------------
  # Parametric analysis
  # ----------------------------------------------------------
  if (normal_data) {
    
    anova_model <- aov(Value ~ Group, data = variable_data)
    anova_results <- summary(anova_model)
    overall_p <- round(anova_results[[1]]$`Pr(>F)`[1], 4)
    
    if (overall_p < 0.05) {
      pw_test <- pairwise.t.test(
        variable_data$Value,
        variable_data$Group,
        p.adjust.method = "holm",
        var.equal = TRUE
      )
      p_LI_CWO <- round(pw_test$p.value["CWO", "LI"], 4)
      p_LI_T2D <- round(pw_test$p.value["T2D", "LI"], 4)
      p_CWO_T2D <- round(pw_test$p.value["T2D", "CWO"], 4)
    }
    
    stat_type <- "mean"
    
    # ----------------------------------------------------------
    # Non-parametric analysis
    # ----------------------------------------------------------
  } else {
    
    kw_test <- kruskal.test(Value ~ Group, data = variable_data)
    overall_p <- round(kw_test$p.value, 4)
    
    if (overall_p < 0.05) {
      dunn_res <- PMCMRplus::kwAllPairsDunnTest(
        Value ~ Group,
        data = variable_data,
        p.adjust.method = "holm"
      )
      pmat <- dunn_res$p.value
      p_LI_CWO  <- round(pmat["CWO", "LI"], 4)
      p_LI_T2D  <- round(pmat["T2D", "LI"], 4)
      p_CWO_T2D <- round(pmat["T2D", "CWO"], 4)
    }
    
    stat_type <- "median"
  }
  
  # ----------------------------------------------------------
  # Store results for the current variable
  # ----------------------------------------------------------
  result_df <- data.frame(
    Variable = variable_name,
    LI = get_group_stat("LI", stat_type),
    CWO = get_group_stat("CWO", stat_type),
    T2D = get_group_stat("T2D", stat_type),
    Overall_P = overall_p,
    LI_vs_CWO = p_LI_CWO,
    LI_vs_T2D = p_LI_T2D,
    CWO_vs_T2D = p_CWO_T2D,
    N = paste0(
      "(",
      summary_stats$Count[summary_stats$Group == "LI"], ", ",
      summary_stats$Count[summary_stats$Group == "CWO"], ", ",
      summary_stats$Count[summary_stats$Group == "T2D"],
      ")"
    )
  )
  
  # Append result to output list
  output_list[[variable_name]] <<- result_df
}

# ============================================================
# Run analysis for selected variables
# ============================================================
variables <- colnames(analysis_sheet)[2:13]
for (variable in variables) {
  analyze_variable(variable)
}

# ============================================================
# Combine all results and save to Excel
# ============================================================
final_output <- bind_rows(output_list)

write_xlsx(
  final_output,
  "C:/Users/CardiovascularDiabetology_results.xlsx"
)